package com.heima.wemedia.service.impl;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.baomidou.mybatisplus.core.toolkit.StringUtils;
import com.heima.article.IArticleClient;
import com.heima.common.exception.CustomException;
import com.heima.common.redis.CacheService;
import com.heima.common.tess4j.TesseractClient;
import com.heima.file.service.FileStorageService;
import com.heima.model.article.dtos.ArticleDto;
import com.heima.model.common.dtos.ResponseResult;
import com.heima.model.common.enums.AppHttpCodeEnum;
import com.heima.model.wemedia.pojos.WmChannel;
import com.heima.model.wemedia.pojos.WmNews;
import com.heima.model.wemedia.pojos.WmSensitive;
import com.heima.model.wemedia.pojos.WmUser;
import com.heima.utils.common.SensitiveWordUtil;
import com.heima.wemedia.mapper.WmChannelMapper;
import com.heima.wemedia.mapper.WmNewsMapper;
import com.heima.wemedia.mapper.WmSensitiveMapper;
import com.heima.wemedia.mapper.WmUserMapper;
import com.heima.wemedia.service.BaiDuScanService;
import com.heima.wemedia.service.WmAutoScanService;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.BeanUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;

import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.util.*;
import java.util.stream.Collectors;

/**
 * @author 陈辉
 * @data 2023 11:30
 */
@Service
@Transactional
@Slf4j
public class WmAutoScanServiceImpl implements WmAutoScanService {
    @Autowired
    private WmNewsMapper wmNewsMapper;
    @Autowired
    private BaiDuScanService baiDuScanService;
    @Autowired
    private IArticleClient iArticleClient;
    @Autowired
    private WmUserMapper wmUserMapper;
    @Autowired
    private WmChannelMapper wmChannelMapper;
    @Autowired
    private WmSensitiveMapper wmSensitiveMapper;
    @Autowired
    private CacheService cacheService;
    @Autowired
    private FileStorageService fileStorageService;
    @Autowired
    private TesseractClient tesseractClient;

    /**
     * 自动审核文章
     *
     * @param newsId 文章id
     */
    @Override
    public void autoScan(Integer newsId) {
        //1. 查找文章信息
        WmNews wmNews = wmNewsMapper.selectById(newsId);
        if (wmNews == null || StringUtils.isBlank(wmNews.getContent()))
            throw new CustomException(AppHttpCodeEnum.PARAM_INVALID);

        //2. 提取出文章的文本和图片
        /*
                {
                    "text":"标题文本内容标签",
                    "img": ['url1',"url2","url3"]
                }
         */
        Map<String, Object> textAndImg = handleTextAndImg(wmNews);

        //3. 先审核文本内容
        boolean isTextScan = scanText(textAndImg.get("text").toString(),wmNews);
        if (!isTextScan)return;

        //4. 先审核图片内容
        boolean isImgScan = scanImg((List<String>) textAndImg.get("img"),wmNews);
        if (!isImgScan)return;

        //5. 远程调用article文章微服务，保存app端文章信息
        ArticleDto articleDto = new ArticleDto();
        BeanUtils.copyProperties(wmNews,articleDto);
        //5.1 处理一些数据
        // 作者id： user_id -- > authorId
        articleDto.setAuthorId(Long.valueOf(wmNews.getUserId()));
        // 文章布局：type --> layout
        articleDto.setLayout(wmNews.getType());
        // 作者名字： authorName
        if (wmNews.getUserId() != null) {
            WmUser wmUser = wmUserMapper.selectById(wmNews.getUserId());
            articleDto.setAuthorName(wmUser.getName());
        }
        // 频道名称： channelName
        if (wmNews.getChannelId() != null){
            WmChannel wmChannel = wmChannelMapper.selectById(wmNews.getChannelId());
            articleDto.setChannelName(wmChannel.getName());
        }
        //判断当前wmNews文章有没有articleId，如果有，则说明之前同步过，那么需要将articleDto的id设置上
        if (wmNews.getArticleId() != null){
            articleDto.setId(wmNews.getArticleId());
        }


        ResponseResult responseResult = iArticleClient.saveOrUpdateArticle(articleDto);

        //6. 拿到article微服务返回的article_id，回填到wm_news表的article_id字段
        if (responseResult.getCode().equals(200)){
            Long articleId = Long.valueOf(responseResult.getData().toString());
            wmNews.setArticleId(articleId);
            updateNewsStatus(wmNews,WmNews.Status.PUBLISHED.getCode(),"审核成功");
        }

    }

    //内容审核 - 图片审核
    private boolean scanImg(List<String> img, WmNews wmNews) {
        //调用百度云审核图片
        for (String url : img) {
            int conclusionType = baiDuScanService.scanImg(url);
            if (conclusionType == 2){
                //图片内容不合规: 修改文章状态为：2 审核失败
                updateNewsStatus(wmNews, WmNews.Status.FAIL.getCode(), "审核失败：图片内容违规!");
                return false;
            }else if (conclusionType == 3){
                //图片内容疑似不合规: 修改文章状态为：3 人工审核
                updateNewsStatus(wmNews, WmNews.Status.ADMIN_AUTH.getCode(), "自动审核失败：图片内容疑似违规!");
                return false;
            }
        }

        return true;
    }

    //内容审核 - 文本审核
    private boolean scanText(String text,WmNews wmNews) {
        //先内审
        boolean isOk =  handleTextWithDFA(text);
        if (!isOk){
            updateNewsStatus(wmNews, WmNews.Status.FAIL.getCode(), "审核失败：文本内容违规!");
            return false;
        }

        //再外审
        int conclusionType = baiDuScanService.scanText(text);
        if (conclusionType == 2){
            //文本内容不合规: 修改文章状态为：2 审核失败
            updateNewsStatus(wmNews, WmNews.Status.FAIL.getCode(), "审核失败：文本内容违规!");
            return false;
        }else if (conclusionType == 3){
            //文本内容疑似不合规: 修改文章状态为：3 人工审核
            updateNewsStatus(wmNews, WmNews.Status.ADMIN_AUTH.getCode(), "自动审核失败：文本内容疑似违规!");
            return false;
        }

        return true;
    }

    //基于DFA算法实现自管理敏感词过滤
    private boolean handleTextWithDFA(String text) {
        //缓存优化： 使用redis来缓存敏感词数据
        List<String> sensitives = null;

        String sensitives1JSON = cacheService.get("sensitives");
        if (StringUtils.isNotBlank(sensitives1JSON)){
            //缓存有，直接用
            sensitives = JSON.parseObject(sensitives1JSON,List.class);
        }else{
            //缓存没有,查询数据库
            //1. 读取数据库中的敏感词
            List<WmSensitive> wmSensitives = wmSensitiveMapper.selectList(null);
            sensitives = wmSensitives.stream()
                    .map(wmSensitive -> wmSensitive.getSensitives())
                    .collect(Collectors.toList());
            //存入缓存
            cacheService.set("sensitives", JSON.toJSONString(sensitives));
        }

        //来初始化DFA词典
        SensitiveWordUtil.initMap(sensitives);

        //2. 对要审核的文本进行敏感词的匹配
        Map<String, Integer> resMap = SensitiveWordUtil.matchWords(text);

        //3. 返回结果
        return resMap.size() == 0;
    }

    //修改文章状态
    private void updateNewsStatus(WmNews wmNews, short status, String reason) {
        wmNews.setStatus(status);
        wmNews.setReason(reason);
        wmNewsMapper.updateById(wmNews);
    }

    //提取出文章的文本和图片
    private Map<String, Object> handleTextAndImg(WmNews wmNews) {
        Map<String, Object> resultMap = new HashMap<>();
        //1. 定义一个StringBuilder 用来接收所有的文本信息
        StringBuilder sb = new StringBuilder();
        //2. 定义一个List<String> 用来接收所有的图片url信息
        List<String> list = new ArrayList<>();
        //收集文章标题文本
        sb.append(wmNews.getTitle());
        //收集文章内容中的文本信息
        JSONArray jsonArray = JSONArray.parseArray(wmNews.getContent());
        for (Object obj : jsonArray) {
            JSONObject jsonObject = JSONObject.parseObject(obj.toString());
            if ("text".equals(jsonObject.getString("type"))){
                sb.append(jsonObject.getString("value"));
            }else if ("image".equals(jsonObject.getString("type"))){
                String imgUrl = jsonObject.getString("value");
                list.add(imgUrl);

                //基于tess4j实现图片OCR识别，对识别到的文字存入StringBuilder
                byte[] bytes = fileStorageService.downLoadFile(imgUrl);
                InputStream is = new ByteArrayInputStream(bytes);
                String textInImg = tesseractClient.doOCR(is);
                if (StringUtils.isNotBlank(textInImg)){
                    sb.append(textInImg);
                }
            }
        }
        //收集label标签文本
        if (StringUtils.isNotBlank(wmNews.getLabels())){
            sb.append(wmNews.getLabels());
        }

        //收集封面图片
        if (StringUtils.isNotBlank(wmNews.getImages())){
            String[] split = wmNews.getImages().split(",");
            list.addAll(Arrays.asList(split));
        }

        resultMap.put("text",sb.toString());
        //对图片去重处理一下
        list = list.stream().distinct().collect(Collectors.toList());
        resultMap.put("img",list);
        return resultMap;
    }
}
